#import python packages
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
#Load dataset
fetal_health=pd.read_csv('fetal_health.csv')
fetal_health.head(3)
| baseline value | accelerations | fetal_movement | uterine_contractions | light_decelerations | severe_decelerations | prolongued_decelerations | abnormal_short_term_variability | mean_value_of_short_term_variability | percentage_of_time_with_abnormal_long_term_variability | ... | histogram_min | histogram_max | histogram_number_of_peaks | histogram_number_of_zeroes | histogram_mode | histogram_mean | histogram_median | histogram_variance | histogram_tendency | fetal_health | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 120.0 | 0.000 | 0.0 | 0.000 | 0.000 | 0.0 | 0.0 | 73.0 | 0.5 | 43.0 | ... | 62.0 | 126.0 | 2.0 | 0.0 | 120.0 | 137.0 | 121.0 | 73.0 | 1.0 | 2.0 |
| 1 | 132.0 | 0.006 | 0.0 | 0.006 | 0.003 | 0.0 | 0.0 | 17.0 | 2.1 | 0.0 | ... | 68.0 | 198.0 | 6.0 | 1.0 | 141.0 | 136.0 | 140.0 | 12.0 | 0.0 | 1.0 |
| 2 | 133.0 | 0.003 | 0.0 | 0.008 | 0.003 | 0.0 | 0.0 | 16.0 | 2.1 | 0.0 | ... | 68.0 | 198.0 | 5.0 | 1.0 | 141.0 | 135.0 | 138.0 | 13.0 | 0.0 | 1.0 |
3 rows × 22 columns
#Check dtype of data
fetal_health.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2126 entries, 0 to 2125 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 baseline value 2126 non-null float64 1 accelerations 2126 non-null float64 2 fetal_movement 2126 non-null float64 3 uterine_contractions 2126 non-null float64 4 light_decelerations 2126 non-null float64 5 severe_decelerations 2126 non-null float64 6 prolongued_decelerations 2126 non-null float64 7 abnormal_short_term_variability 2126 non-null float64 8 mean_value_of_short_term_variability 2126 non-null float64 9 percentage_of_time_with_abnormal_long_term_variability 2126 non-null float64 10 mean_value_of_long_term_variability 2126 non-null float64 11 histogram_width 2126 non-null float64 12 histogram_min 2126 non-null float64 13 histogram_max 2126 non-null float64 14 histogram_number_of_peaks 2126 non-null float64 15 histogram_number_of_zeroes 2126 non-null float64 16 histogram_mode 2126 non-null float64 17 histogram_mean 2126 non-null float64 18 histogram_median 2126 non-null float64 19 histogram_variance 2126 non-null float64 20 histogram_tendency 2126 non-null float64 21 fetal_health 2126 non-null float64 dtypes: float64(22) memory usage: 365.5 KB
fetal_health.shape
(2126, 22)
fetal_health.dtypes
baseline value float64 accelerations float64 fetal_movement float64 uterine_contractions float64 light_decelerations float64 severe_decelerations float64 prolongued_decelerations float64 abnormal_short_term_variability float64 mean_value_of_short_term_variability float64 percentage_of_time_with_abnormal_long_term_variability float64 mean_value_of_long_term_variability float64 histogram_width float64 histogram_min float64 histogram_max float64 histogram_number_of_peaks float64 histogram_number_of_zeroes float64 histogram_mode float64 histogram_mean float64 histogram_median float64 histogram_variance float64 histogram_tendency float64 fetal_health float64 dtype: object
#Check null value for each column
fetal_health.isna().any()
baseline value False accelerations False fetal_movement False uterine_contractions False light_decelerations False severe_decelerations False prolongued_decelerations False abnormal_short_term_variability False mean_value_of_short_term_variability False percentage_of_time_with_abnormal_long_term_variability False mean_value_of_long_term_variability False histogram_width False histogram_min False histogram_max False histogram_number_of_peaks False histogram_number_of_zeroes False histogram_mode False histogram_mean False histogram_median False histogram_variance False histogram_tendency False fetal_health False dtype: bool
fetal_health.describe()
| baseline value | accelerations | fetal_movement | uterine_contractions | light_decelerations | severe_decelerations | prolongued_decelerations | abnormal_short_term_variability | mean_value_of_short_term_variability | percentage_of_time_with_abnormal_long_term_variability | ... | histogram_min | histogram_max | histogram_number_of_peaks | histogram_number_of_zeroes | histogram_mode | histogram_mean | histogram_median | histogram_variance | histogram_tendency | fetal_health | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.00000 | ... | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 | 2126.000000 |
| mean | 133.303857 | 0.003178 | 0.009481 | 0.004366 | 0.001889 | 0.000003 | 0.000159 | 46.990122 | 1.332785 | 9.84666 | ... | 93.579492 | 164.025400 | 4.068203 | 0.323612 | 137.452023 | 134.610536 | 138.090310 | 18.808090 | 0.320320 | 1.304327 |
| std | 9.840844 | 0.003866 | 0.046666 | 0.002946 | 0.002960 | 0.000057 | 0.000590 | 17.192814 | 0.883241 | 18.39688 | ... | 29.560212 | 17.944183 | 2.949386 | 0.706059 | 16.381289 | 15.593596 | 14.466589 | 28.977636 | 0.610829 | 0.614377 |
| min | 106.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 12.000000 | 0.200000 | 0.00000 | ... | 50.000000 | 122.000000 | 0.000000 | 0.000000 | 60.000000 | 73.000000 | 77.000000 | 0.000000 | -1.000000 | 1.000000 |
| 25% | 126.000000 | 0.000000 | 0.000000 | 0.002000 | 0.000000 | 0.000000 | 0.000000 | 32.000000 | 0.700000 | 0.00000 | ... | 67.000000 | 152.000000 | 2.000000 | 0.000000 | 129.000000 | 125.000000 | 129.000000 | 2.000000 | 0.000000 | 1.000000 |
| 50% | 133.000000 | 0.002000 | 0.000000 | 0.004000 | 0.000000 | 0.000000 | 0.000000 | 49.000000 | 1.200000 | 0.00000 | ... | 93.000000 | 162.000000 | 3.000000 | 0.000000 | 139.000000 | 136.000000 | 139.000000 | 7.000000 | 0.000000 | 1.000000 |
| 75% | 140.000000 | 0.006000 | 0.003000 | 0.007000 | 0.003000 | 0.000000 | 0.000000 | 61.000000 | 1.700000 | 11.00000 | ... | 120.000000 | 174.000000 | 6.000000 | 0.000000 | 148.000000 | 145.000000 | 148.000000 | 24.000000 | 1.000000 | 1.000000 |
| max | 160.000000 | 0.019000 | 0.481000 | 0.015000 | 0.015000 | 0.001000 | 0.005000 | 87.000000 | 7.000000 | 91.00000 | ... | 159.000000 | 238.000000 | 18.000000 | 10.000000 | 187.000000 | 182.000000 | 186.000000 | 269.000000 | 1.000000 | 3.000000 |
8 rows × 22 columns
Upon examining the dataset, it appears to be exceptionally clean. There are no null values present in any of the columns, and all the data is in a numerical format.
# Load python packages
import matplotlib.pyplot as plt
import seaborn as sns
# Distribution of target variable (fetal health)
plt.figure(figsize=(8,6))
sns.countplot(x='fetal_health', data=fetal_health)
plt.title('Fetal Health Distribution')
plt.xlabel('Fetal Health')
plt.show()
# Count the number of instances in each class
class_counts = fetal_health['fetal_health'].value_counts()
# Print the class distribution
print('Class distribution:')
print(class_counts)
Class distribution: fetal_health 1.0 1655 2.0 295 3.0 176 Name: count, dtype: int64
#Correlation Heatmap
plt.figure(figsize=(12,12))
sns.heatmap(fetal_health.corr(),annot=True,cmap='coolwarm',vmin=-1,vmax=1)
plt.title('Correlation Heatmap')
Text(0.5, 1.0, 'Correlation Heatmap')
# Pairplot for visualizing relationships and distribution
sns.pairplot(fetal_health,hue='fetal_health', diag_kind='hist', palette='Set2')
plt.show()